{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e6f9aa61",
   "metadata": {},
   "source": [
    "#### Replication code for \"Dominating the Narrative: How Scholars Outside of Africa Define African Politics in the Top Political Science Journals\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f73e38a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load libraries\n",
    "import numpy as np \n",
    "import pandas as pd  \n",
    "import networkx as nx # (use version 2.8.8 to replicate the same node layout in the network visualizations)\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.lines import Line2D\n",
    "import warnings\n",
    "warnings.simplefilter(action='ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8164ae0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load and preview author-paper file\n",
    "df = pd.read_csv('SciSciNet_AfricanPolitics_Data.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c57e868",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Number of seed papers\n",
    "nb_papers = df['PaperID'].nunique()\n",
    "print(f'Number of seed papers: {nb_papers}')\n",
    "\n",
    "# Number of associated authors\n",
    "nb_authors = df['AuthorID'].nunique()\n",
    "print(f'Number of associated authors: {nb_authors}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81a4205d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Code to reproduce Figure 1\n",
    "\n",
    "data = []\n",
    "for year in range(1956, 2022):\n",
    "\n",
    "    df_tmp = df[df['Year']<=year]\n",
    "\n",
    "    # Update based in Africa attribute\n",
    "    location_dict = df_tmp.groupby('AuthorID')['Author_based_in_Africa'].agg(lambda x: x.mode()).to_dict()\n",
    "    for key,value in location_dict.items():\n",
    "        if isinstance(value, (list, np.ndarray)):\n",
    "            location_dict[key]=1.\n",
    "    \n",
    "    for key, value in location_dict.items():\n",
    "        df_tmp['Author_based_in_Africa'][df_tmp['AuthorID']==key] = value\n",
    "\n",
    "    df_tmp = df_tmp.drop_duplicates(subset='AuthorID')\n",
    "\n",
    "    nb_authors = len(df_tmp)\n",
    "    nb_authors_based_in_africa = len(df_tmp[df_tmp['Author_based_in_Africa']==1])\n",
    "    nb_african_authors = len(df_tmp[df_tmp['Author_is_African']==1])\n",
    "    nb_african_authors_based_in_africa = len(df_tmp[(df_tmp['Author_based_in_Africa']==1) & (df_tmp['Author_is_African']==1)])\n",
    "\n",
    "    perc_african_authors = nb_african_authors / nb_authors\n",
    "    perc_authors_based_in_africa = nb_authors_based_in_africa / nb_authors\n",
    "    perc_african_and_based_in_africa = nb_african_authors_based_in_africa / nb_authors\n",
    "\n",
    "    data.append((year, perc_african_authors, perc_authors_based_in_africa, perc_african_and_based_in_africa))\n",
    "\n",
    "df_stats = pd.DataFrame(data, columns = [\"year\", \"perc_african_authors\", \"perc_authors_based_in_africa\", \"perc_african_and_based_in_africa\"])\n",
    "\n",
    "plt.figure()\n",
    "plt.plot(df_stats['year'],100*df_stats['perc_african_authors'], linestyle='dashed', color='green', label='African authors')\n",
    "plt.plot(df_stats['year'],100*df_stats['perc_authors_based_in_africa'], linestyle='dotted', color='blue', label='Africa-based authors')\n",
    "plt.plot(df_stats['year'],100*df_stats['perc_african_and_based_in_africa'], linestyle='dashdot', color='turquoise', label='African Africa-based authors')\n",
    "plt.ylabel('Percentage of authors in the dataset (%)')\n",
    "plt.xlabel('Year')\n",
    "plt.xlim((1965, 2021))\n",
    "plt.legend()\n",
    "plt.savefig('Figure_1.png', dpi=500)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1bc9dc2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load and preview citation file\n",
    "df_cit = pd.read_csv('SciSciNet_AfricanPolitics_CitingCited.csv')\n",
    "df_cit.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a26d7095",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Construct the citation networks\n",
    "\n",
    "# Node attributes\n",
    "selected_columns = ['AuthorID', 'Author_Name', 'Author_is_African', 'Author_is_female']\n",
    "author_attributes = df[selected_columns].drop_duplicates('AuthorID').set_index('AuthorID').to_dict(orient='index')\n",
    "\n",
    "location_dict = df.groupby('AuthorID')['Author_based_in_Africa'].agg(lambda x: x.mode()).to_dict()\n",
    "for key,value in location_dict.items():\n",
    "    if isinstance(value, (list, np.ndarray)):\n",
    "        location_dict[key]=1.\n",
    "for key in author_attributes:\n",
    "    author_attributes[key]['Author_based_in_Africa'] = location_dict.get(key)\n",
    "\n",
    "G_citation = {}\n",
    "years = [1980, 1990, 2000, 2010, 2021]\n",
    "for year in years:\n",
    "    print(f'Building the 1956-{year} network...')\n",
    "\n",
    "    # Initialize directed graph for the citation network\n",
    "    G_citation[year] = nx.DiGraph()\n",
    "\n",
    "    # Build the network from df_cit, using df to find authors\n",
    "    for _, row in df_cit.iterrows():\n",
    "        citing_paper = row['Citing_PaperID']\n",
    "        cited_paper = row['Cited_PaperID']\n",
    "\n",
    "        y_cited = int(df[df['PaperID']==cited_paper].iloc[0]['Year'])\n",
    "\n",
    "        if y_cited<=year:\n",
    "        \n",
    "            # Find the authors involved in the citing and cited papers\n",
    "            citing_authors = df[df['PaperID'] == citing_paper]['AuthorID'].tolist()\n",
    "            cited_authors = df[df['PaperID'] == cited_paper]['AuthorID'].tolist()\n",
    "\n",
    "            if citing_authors:\n",
    "\n",
    "                y_citing = int(df[df['PaperID']==citing_paper].iloc[0]['Year'])\n",
    "                if y_citing <= year:\n",
    "            \n",
    "                    # Create directed edges between citing and cited authors\n",
    "                    for author_citing in citing_authors:\n",
    "                        for author_cited in cited_authors:\n",
    "                            if author_citing != author_cited:  # Exclude self-loops\n",
    "                                if G_citation[year].has_edge(author_citing, author_cited):\n",
    "                                    G_citation[year][author_citing][author_cited]['weight'] += 1  # Increase citation count\n",
    "                                else:\n",
    "                                    G_citation[year].add_edge(author_citing, author_cited, weight=1)  # Add edge with initial weight of 1\n",
    "\n",
    "    # Update based in Africa attribute\n",
    "    location_dict = df[df['Year']<=year].groupby('AuthorID')['Author_based_in_Africa'].agg(lambda x: x.mode()).to_dict()\n",
    "    for key,value in location_dict.items():\n",
    "        if isinstance(value, (list, np.ndarray)):\n",
    "            location_dict[key]=1.\n",
    "    for key in author_attributes:\n",
    "        author_attributes[key]['Author_based_in_Africa'] = location_dict.get(key)\n",
    "\n",
    "    # Add attributes to the graph\n",
    "    nx.set_node_attributes(G_citation[year], author_attributes)\n",
    "\n",
    "print(f'Done!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d78144fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute the authorty scores and visualize the networks (Figures 2-6)\n",
    "\n",
    "for year in [2021,2010,2000,1990,1980]:\n",
    "\n",
    "    # Compute authority score\n",
    "    hubs, authorities = nx.hits(G_citation[year])\n",
    "\n",
    "    threshold = 1e-10\n",
    "    # Update values in-place\n",
    "    for key in authorities:\n",
    "        if abs(authorities[key]) < threshold:\n",
    "            authorities[key] = 0\n",
    "\n",
    "    # Generate node attributes\n",
    "    node_sizes = {node: authorities[node] * 10000/4 + 10 for node in G_citation[year].nodes()}  # Size based on Authority score\n",
    "    node_colors = {}\n",
    "    female_nodes = []\n",
    "    male_nodes = []\n",
    "    node_labels = {}\n",
    "\n",
    "    # Assign colors and separate nodes by gender for shape\n",
    "    for node, data in G_citation[year].nodes(data=True):\n",
    "        african = data.get('Author_is_African')\n",
    "        based_in_africa = data.get('Author_based_in_Africa')\n",
    "        is_female = data.get('Author_is_female')\n",
    "        author_name = data.get('Author_Name')\n",
    "        author_name = author_name.strip().split()[-1]\n",
    "        \n",
    "        # Determine color\n",
    "        if african == 1 and based_in_africa == 1:\n",
    "            node_colors[node] = '#00ffff'  # Both African and based in Africa\n",
    "        elif african == 1 and based_in_africa == 0:\n",
    "            node_colors[node] = '#00ff00'   # African but not based in Africa\n",
    "        elif based_in_africa == 1:\n",
    "            node_colors[node] = '#0000ff'    # Based in Africa but not African\n",
    "        else:\n",
    "            node_colors[node] = '#000000'   # Others\n",
    "\n",
    "        # Separate nodes based on gender for different shapes\n",
    "        if is_female == 1:\n",
    "            female_nodes.append(node)  # Female nodes (triangle)\n",
    "        else:\n",
    "            male_nodes.append(node)  # Male nodes (circle)\n",
    "        \n",
    "        node_labels[node] = author_name\n",
    "\n",
    "    # Create the figure\n",
    "    plt.figure(figsize=(15, 15))\n",
    "\n",
    "    # Generate positions for the nodes based on the most recent network\n",
    "    if year==2021:\n",
    "        positions = nx.spring_layout(G_citation[year], seed=42, k=1.)\n",
    " \n",
    "    df_tmp = df[df['Year']<=year].drop_duplicates(subset='AuthorID', keep='first')\n",
    "    num_authors = len(df_tmp)\n",
    "    th = sorted(authorities.values(), reverse=True)[round(0.05*num_authors)-1]\n",
    "    filtered_nodes = [node for node, auth in authorities.items() if (auth >= th) or ((node_colors[node]!='#000000') and (auth >= th/4))]\n",
    "    node_labels = {node: node_labels[node] for node in filtered_nodes}\n",
    "\n",
    "    # Draw male nodes (circles)\n",
    "    nx.draw_networkx_nodes(G_citation[year], pos=positions, nodelist=male_nodes, \n",
    "                        node_size=[node_sizes[n] for n in male_nodes],\n",
    "                        node_color=[node_colors[n] for n in male_nodes], edgecolors='black',\n",
    "                        alpha=0.25) \n",
    "\n",
    "    # Draw female nodes (triangles)\n",
    "    nx.draw_networkx_nodes(G_citation[year], pos=positions, nodelist=female_nodes, \n",
    "                        node_size=[node_sizes[n] for n in female_nodes],\n",
    "                        node_color=[node_colors[n] for n in female_nodes], edgecolors='black',\n",
    "                        alpha=0.25, node_shape='^')\n",
    "\n",
    "    # Draw edges\n",
    "    nx.draw_networkx_edges(G_citation[year], pos=positions, edge_color='gray', alpha=0.1)\n",
    "\n",
    "    nx.draw_networkx_labels(G_citation[year], pos=positions, font_size=6, font_color=\"black\", labels=node_labels, font_weight='bold')\n",
    "\n",
    "    # Create legend manually\n",
    "    legend_elements = [\n",
    "        Line2D([0], [0], marker='o', color='black', alpha=0.25, markersize=10, markerfacecolor='#00ffff', linestyle='None', label='Both African and based in Africa'),\n",
    "        Line2D([0], [0], marker='o', color='black', alpha=0.25, markersize=10, markerfacecolor='#00ff00', linestyle='None', label='African but not based in Africa'),\n",
    "        Line2D([0], [0], marker='o', color='black', alpha=0.25, markersize=10, markerfacecolor='#0000ff', linestyle='None', label='Based in Africa but not African'),\n",
    "        Line2D([0], [0], marker='o', color='black', alpha=0.25, markersize=10, markerfacecolor='black', linestyle='None', label='Not African and not based in Africa'),\n",
    "        Line2D([0], [0], marker='o', color='black', markersize=10, markerfacecolor='white', linestyle='None', label='Male'),\n",
    "        Line2D([0], [0], marker='^', color='black', markersize=10, markerfacecolor='white', linestyle='None', label='Female')]\n",
    "    plt.legend(handles=legend_elements)\n",
    "\n",
    "    # Hide axis and show plot\n",
    "    plt.axis('off')\n",
    "    plt.title(f\"Author citation network (1956-{year})\")\n",
    "\n",
    "    plt.savefig(f\"author_citation_network_{year}.png\", bbox_inches='tight')\n",
    "\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27c4a06f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reproduce Table 1 (Top 5% authors by authority score)\n",
    "f = open('output_for_table_1.txt', 'w')\n",
    "\n",
    "years = [1980, 1990, 2000, 2010, 2021]\n",
    "for year in years:\n",
    "    print(f'Years 1956-{year}',file=f)\n",
    "\n",
    "    hubs, authorities = nx.hits(G_citation[year])\n",
    "\n",
    "    data_entries = []\n",
    "\n",
    "    for node, data in G_citation[year].nodes(data=True):\n",
    "        african = data.get('Author_is_African', 0)\n",
    "        based_in_africa = data.get('Author_based_in_Africa', 0)\n",
    "        is_female = data.get('Author_is_female', 0)\n",
    "        author_name = data.get('Author_Name')\n",
    "        pr = authorities[node]\n",
    "        data_entries.append((node,author_name,african,based_in_africa,is_female,pr))\n",
    "    \n",
    "    df_auth_cit_net = pd.DataFrame(data_entries, columns=['AuthorID','Author_Name','Author_is_African','Author_based_in_Africa','Author_is_female','Authority'])\n",
    "    df_auth_cit_net.sort_values(by='Authority', ascending=False, inplace=True)\n",
    "\n",
    "    df2 = df[df['Year']<=year].drop_duplicates(subset='AuthorID', keep='first')\n",
    "    num_authors = len(df2)\n",
    "\n",
    "    top = 5\n",
    "    stop_index = round(top*num_authors/100)\n",
    "    df_top = df_auth_cit_net.iloc[:stop_index]\n",
    "\n",
    "    nb_authors = len(df_top)\n",
    "    nb_females = len(df_top[df_top['Author_is_female']==1])\n",
    "    nb_africans = len(df_top[df_top['Author_is_African']==1])\n",
    "    nb_based_in_africa = len(df_top[df_top['Author_based_in_Africa']==1])\n",
    "    nb_african_and_based_in_africa = len(df_top[(df_top['Author_is_African']==1) & (df_top['Author_based_in_Africa']==1)])\n",
    "    nb_african_and_female = len(df_top[(df_top['Author_is_African']==1) & (df_top['Author_is_female']==1)])\n",
    "    nb_female_and_based_in_africa = len(df_top[(df_top['Author_is_female']==1) & (df_top['Author_based_in_Africa']==1)])\n",
    "    nb_female_african_and_based_in_africa = len(df_top[(df_top['Author_is_female']==1) & (df_top['Author_based_in_Africa']==1) & (df_top['Author_is_African']==1)])\n",
    "\n",
    "    print(f\"Top {nb_authors} out of {num_authors} (i.e., {top}%) by authority score centrality:\",file=f)\n",
    "    print(f\"{nb_africans} (i.e., {round(100*nb_africans/nb_authors,2)}%) Africans ({len(df2[df2['Author_is_African']==1])} i.e. {round(100*len(df2[df2['Author_is_African']==1])/num_authors,2)}% in the complete set)\",file=f)\n",
    "    print(f\"{nb_based_in_africa} (i.e., {round(100*nb_based_in_africa/nb_authors,2)}%) based in Africa ({len(df2[df2['Author_based_in_Africa']==1])} i.e. {round(100*len(df2[df2['Author_based_in_Africa']==1])/num_authors,2)}% in the complete set)\",file=f)\n",
    "    print(f\"{nb_african_and_based_in_africa} (i.e., {round(100*nb_african_and_based_in_africa/nb_authors,2)}%) African and based in Africa ({len(df2[(df2['Author_based_in_Africa']==1) & (df2['Author_is_African']==1)])} i.e. {round(100*len(df2[(df2['Author_based_in_Africa']==1) & (df2['Author_is_African']==1)])/num_authors,2)}% in the complete set)\",file=f)\n",
    "    print(f\"{nb_females} (i.e., {round(100*nb_females/nb_authors,2)}%) females ({len(df2[df2['Author_is_female']==1])} i.e. {round(100*len(df2[df2['Author_is_female']==1])/num_authors,2)}% in the complete set)\",file=f)\n",
    "    print(f\"{nb_african_and_female} (i.e., {round(100*nb_african_and_female/nb_authors,2)}%) female and African ({len(df2[(df2['Author_is_African']==1) & (df2['Author_is_female']==1)])} i.e. {round(100*len(df2[(df2['Author_is_African']==1) & (df2['Author_is_female']==1)])/num_authors,2)}% in the complete set)\",file=f)\n",
    "    print(f\"{nb_female_and_based_in_africa} (i.e., {round(100*nb_female_and_based_in_africa/nb_authors,2)}%) female and based in Africa ({len(df2[(df2['Author_based_in_Africa']==1) & (df2['Author_is_female']==1)])} i.e. {round(100*len(df2[(df2['Author_based_in_Africa']==1) & (df2['Author_is_female']==1)])/num_authors,2)}% in the complete set)\",file=f)\n",
    "    print(f\"{nb_female_african_and_based_in_africa} (i.e., {round(100*nb_female_african_and_based_in_africa/nb_authors,2)}%) female, African, and based in Africa ({len(df2[(df2['Author_is_African']==1) & (df2['Author_is_female']==1) & (df2['Author_based_in_Africa']==1)])} i.e. {round(100*len(df2[(df2['Author_is_African']==1) & (df2['Author_is_female']==1) & (df2['Author_based_in_Africa']==1)])/num_authors,2)}% in the complete set)\",file=f)\n",
    "    print(\"\\n\",file=f)\n",
    "\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "061b4256",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reproduce Table 2 (Percentages of authors who cite more non-African and non-Africa-based authors than expected by chance)\n",
    "f = open('output_for_table_2.txt', 'w')\n",
    "\n",
    "# Construct the networks taking isolated nodes into account too\n",
    "G_citation = {}\n",
    "years = [1980, 1990, 2000, 2010, 2021]\n",
    "\n",
    "for year in years:\n",
    "\n",
    "    # Initialize directed graph for the citation network\n",
    "    G_citation[year] = nx.DiGraph()\n",
    "    G_citation[year].add_nodes_from(df[df['Year']<=year]['AuthorID'].unique().tolist())\n",
    "\n",
    "    # Build the network from df_cit, using df to find authors\n",
    "    for _, row in df_cit.iterrows():\n",
    "        citing_paper = row['Citing_PaperID']\n",
    "        cited_paper = row['Cited_PaperID']\n",
    "\n",
    "        y_cited = int(df[df['PaperID']==cited_paper].iloc[0]['Year'])\n",
    "\n",
    "        if y_cited<=year:\n",
    "        \n",
    "            # Find the authors involved in the citing and cited papers\n",
    "            citing_authors = df[df['PaperID'] == citing_paper]['AuthorID'].tolist()\n",
    "            cited_authors = df[df['PaperID'] == cited_paper]['AuthorID'].tolist()\n",
    "\n",
    "            if citing_authors:\n",
    "\n",
    "                y_citing = int(df[df['PaperID']==citing_paper].iloc[0]['Year'])\n",
    "                if y_citing <= year:\n",
    "            \n",
    "                    # Create directed edges between citing and cited authors\n",
    "                    for author_citing in citing_authors:\n",
    "                        for author_cited in cited_authors:\n",
    "                            if author_citing != author_cited:  # Exclude self-loops\n",
    "                                if G_citation[year].has_edge(author_citing, author_cited):\n",
    "                                    G_citation[year][author_citing][author_cited]['weight'] += 1  # Increase citation count\n",
    "                                else:\n",
    "                                    G_citation[year].add_edge(author_citing, author_cited, weight=1)  # Add edge with initial weight of 1\n",
    "\n",
    "    # Update based in Africa attribute\n",
    "    location_dict = df[df['Year']<=year].groupby('AuthorID')['Author_based_in_Africa'].agg(lambda x: x.mode()).to_dict()\n",
    "    for key,value in location_dict.items():\n",
    "        if isinstance(value, (list, np.ndarray)):\n",
    "            location_dict[key]=1.\n",
    "    for key in author_attributes:\n",
    "        author_attributes[key]['Author_based_in_Africa'] = location_dict.get(key)\n",
    "\n",
    "    # Add attributes to the graph\n",
    "    nx.set_node_attributes(G_citation[year], author_attributes)\n",
    "\n",
    "# First row of the table (Citing authors: Non-African and non-Africa-based)\n",
    "print('Citing authors: Non-African and non-Africa-based',file=f)\n",
    "for year in years:\n",
    "\n",
    "    G = G_citation[year].copy()\n",
    "\n",
    "    # Filter nodes by attributes\n",
    "    def is_non_african_author(node):\n",
    "        return (\n",
    "            G.nodes[node].get('Author_is_African') == 0 and\n",
    "            G.nodes[node].get('Author_based_in_Africa') == 0\n",
    "        )\n",
    "    filtered_nodes = [node for node in G.nodes if is_non_african_author(node)]\n",
    "\n",
    "    # For each of these nodes, get the fraction of their *out-neighbors* with same attributes\n",
    "    fractions = []\n",
    "    for node in filtered_nodes:\n",
    "        neighbors = list(G.successors(node))\n",
    "        if not neighbors:\n",
    "            continue  # skip if no out-edges\n",
    "        count = sum(1 for n in neighbors if is_non_african_author(n))\n",
    "        fraction = count / len(neighbors)\n",
    "        fractions.append(fraction)\n",
    "\n",
    "    # Compute baseline: fraction of all nodes in G with those attributes\n",
    "    all_nodes = list(G.nodes)\n",
    "    non_african_nodes = [n for n in all_nodes if is_non_african_author(n)]\n",
    "    baseline_fraction = len(non_african_nodes) / len(all_nodes)\n",
    "\n",
    "    # Calculate how many nodes have a higher fraction than the baseline\n",
    "    above_baseline_count = sum(1 for f in fractions if f > baseline_fraction)\n",
    "    fraction_above_baseline = above_baseline_count / len(fractions)\n",
    "    print('1956 -',year,':',round(fraction_above_baseline*100,2),'%',file=f)\n",
    "\n",
    "# Second row of the table (Citing authors: African or Africa-based)\n",
    "print('Citing authors: African or Africa-based',file=f)\n",
    "for year in years:\n",
    "\n",
    "    G = G_citation[year].copy()\n",
    "    \n",
    "    # Filter nodes by attributes\n",
    "    def is_african_or_based_in_africa(node):\n",
    "        return (\n",
    "            G.nodes[node].get('Author_is_African') == 1 or\n",
    "            G.nodes[node].get('Author_based_in_Africa') == 1\n",
    "        )\n",
    "    filtered_nodes = [node for node in G.nodes if is_african_or_based_in_africa(node)]\n",
    "\n",
    "    # For each of these nodes, get the fraction of their *out-neighbors* with same attributes\n",
    "    fractions = []\n",
    "    for node in filtered_nodes:\n",
    "        neighbors = list(G.successors(node))\n",
    "        if not neighbors:\n",
    "            continue  # skip if no out-edges\n",
    "        count = sum(1 for n in neighbors if is_non_african_author(n))\n",
    "        fraction = count / len(neighbors)\n",
    "        fractions.append(fraction)\n",
    "\n",
    "    # Compute baseline: fraction of all nodes in G with those attributes\n",
    "    all_nodes = list(G.nodes)\n",
    "    non_african_nodes = [n for n in all_nodes if is_non_african_author(n)]\n",
    "    baseline_fraction = len(non_african_nodes) / len(all_nodes)\n",
    "\n",
    "    # Calculate how many nodes have a higher fraction than the baseline\n",
    "    above_baseline_count = sum(1 for f in fractions if f > baseline_fraction)\n",
    "    fraction_above_baseline = above_baseline_count / len(fractions)\n",
    "    print('1956 -',year,':',round(fraction_above_baseline*100,2),'%',file=f)\n",
    "\n",
    "f.close()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "psarticle",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.14.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
